{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from download_data import multi_p_run, put_worker, test_worker, download_mp4, download_align"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "tot_movies=35"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]\n",
      "5\n",
      "35 >> [[0, 7], [7, 14], [14, 21], [21, 28], [28, 35]]\n",
      "[{'succ': {0, 1, 2, 3, 4, 5, 6}, 'fail': set()}, {'succ': {7, 8, 9, 10, 11, 12, 13}, 'fail': set()}, {'succ': {14, 15, 16, 17, 18, 19, 20}, 'fail': set()}, {'succ': {21, 22, 23, 24, 25, 26, 27}, 'fail': set()}, {'succ': {32, 33, 34, 28, 29, 30, 31}, 'fail': set()}]\n"
     ]
    }
   ],
   "source": [
    "res = multi_p_run(tot_movies, put_worker, test_worker, params={}, n_process=5)\n",
    "print (res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Download Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Aligns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s0/align/s0.tar && tar -xvf s0.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s1/align/s1.tar && tar -xvf s1.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s2/align/s2.tar && tar -xvf s2.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s3/align/s3.tar && tar -xvf s3.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s4/align/s4.tar && tar -xvf s4.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s5/align/s5.tar && tar -xvf s5.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s6/align/s6.tar && tar -xvf s6.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s7/align/s7.tar && tar -xvf s7.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s8/align/s8.tar && tar -xvf s8.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s9/align/s9.tar && tar -xvf s9.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s10/align/s10.tar && tar -xvf s10.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s11/align/s11.tar && tar -xvf s11.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s12/align/s12.tar && tar -xvf s12.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s13/align/s13.tar && tar -xvf s13.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s14/align/s14.tar && tar -xvf s14.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s15/align/s15.tar && tar -xvf s15.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s16/align/s16.tar && tar -xvf s16.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s17/align/s17.tar && tar -xvf s17.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s18/align/s18.tar && tar -xvf s18.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s19/align/s19.tar && tar -xvf s19.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s20/align/s20.tar && tar -xvf s20.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s21/align/s21.tar && tar -xvf s21.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s22/align/s22.tar && tar -xvf s22.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s23/align/s23.tar && tar -xvf s23.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s24/align/s24.tar && tar -xvf s24.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s25/align/s25.tar && tar -xvf s25.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s26/align/s26.tar && tar -xvf s26.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s27/align/s27.tar && tar -xvf s27.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s28/align/s28.tar && tar -xvf s28.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s29/align/s29.tar && tar -xvf s29.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s30/align/s30.tar && tar -xvf s30.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s31/align/s31.tar && tar -xvf s31.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s32/align/s32.tar && tar -xvf s32.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s33/align/s33.tar && tar -xvf s33.tar\n",
      "cd ../data/align && wget http://spandh.dcs.shef.ac.uk/gridcorpus/s34/align/s34.tar && tar -xvf s34.tar\n"
     ]
    }
   ],
   "source": [
    "align_path = '../data/align'\n",
    "os.makedirs(align_path, exist_ok=True)\n",
    "\n",
    "res = download_align(0, tot_movies, {'align_path':align_path})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print (res)\n",
    "os.system('rm -f {align_path}/*.tar && rm -f {align_path}/Thumbs.db'.format(align_path=align_path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Moives(MP4s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s0/video/s0.mpg_vcd.zip --output s0.mpg_vcd.zip && unzip s0.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s1/video/s1.mpg_vcd.zip --output s1.mpg_vcd.zip && unzip s1.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s2/video/s2.mpg_vcd.zip --output s2.mpg_vcd.zip && unzip s2.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s3/video/s3.mpg_vcd.zip --output s3.mpg_vcd.zip && unzip s3.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s4/video/s4.mpg_vcd.zip --output s4.mpg_vcd.zip && unzip s4.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s5/video/s5.mpg_vcd.zip --output s5.mpg_vcd.zip && unzip s5.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s6/video/s6.mpg_vcd.zip --output s6.mpg_vcd.zip && unzip s6.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s7/video/s7.mpg_vcd.zip --output s7.mpg_vcd.zip && unzip s7.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s8/video/s8.mpg_vcd.zip --output s8.mpg_vcd.zip && unzip s8.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s9/video/s9.mpg_vcd.zip --output s9.mpg_vcd.zip && unzip s9.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s10/video/s10.mpg_vcd.zip --output s10.mpg_vcd.zip && unzip s10.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s11/video/s11.mpg_vcd.zip --output s11.mpg_vcd.zip && unzip s11.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s12/video/s12.mpg_vcd.zip --output s12.mpg_vcd.zip && unzip s12.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s13/video/s13.mpg_vcd.zip --output s13.mpg_vcd.zip && unzip s13.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s14/video/s14.mpg_vcd.zip --output s14.mpg_vcd.zip && unzip s14.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s15/video/s15.mpg_vcd.zip --output s15.mpg_vcd.zip && unzip s15.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s16/video/s16.mpg_vcd.zip --output s16.mpg_vcd.zip && unzip s16.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s17/video/s17.mpg_vcd.zip --output s17.mpg_vcd.zip && unzip s17.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s18/video/s18.mpg_vcd.zip --output s18.mpg_vcd.zip && unzip s18.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s19/video/s19.mpg_vcd.zip --output s19.mpg_vcd.zip && unzip s19.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s20/video/s20.mpg_vcd.zip --output s20.mpg_vcd.zip && unzip s20.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s21/video/s21.mpg_vcd.zip --output s21.mpg_vcd.zip && unzip s21.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s22/video/s22.mpg_vcd.zip --output s22.mpg_vcd.zip && unzip s22.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s23/video/s23.mpg_vcd.zip --output s23.mpg_vcd.zip && unzip s23.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s24/video/s24.mpg_vcd.zip --output s24.mpg_vcd.zip && unzip s24.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s25/video/s25.mpg_vcd.zip --output s25.mpg_vcd.zip && unzip s25.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s26/video/s26.mpg_vcd.zip --output s26.mpg_vcd.zip && unzip s26.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s27/video/s27.mpg_vcd.zip --output s27.mpg_vcd.zip && unzip s27.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s28/video/s28.mpg_vcd.zip --output s28.mpg_vcd.zip && unzip s28.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s29/video/s29.mpg_vcd.zip --output s29.mpg_vcd.zip && unzip s29.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s30/video/s30.mpg_vcd.zip --output s30.mpg_vcd.zip && unzip s30.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s31/video/s31.mpg_vcd.zip --output s31.mpg_vcd.zip && unzip s31.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s32/video/s32.mpg_vcd.zip --output s32.mpg_vcd.zip && unzip s32.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s33/video/s33.mpg_vcd.zip --output s33.mpg_vcd.zip && unzip s33.mpg_vcd.zip\n",
      "cd ../data/mp4s && curl http://spandh.dcs.shef.ac.uk/gridcorpus/s34/video/s34.mpg_vcd.zip --output s34.mpg_vcd.zip && unzip s34.mpg_vcd.zip\n"
     ]
    }
   ],
   "source": [
    "src_path = '../data/mp4s'\n",
    "res = download_mp4(0, tot_movies, {'src_path':src_path})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print (res)\n",
    "os.system('rm -f {src_path}/*.zip && rm -f {src_path}/*/Thumbs.db'.format(src_path=src_path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Preprocess Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from preprocess_data import preprocess, find_files, Video"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "tgt_path = '../data/datasets'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.makedirs('{tgt_path}'.format(tgt_path=tgt_path), exist_ok=True)\n",
    "os.system('rm -rf {tgt_path}'.format(tgt_path=tgt_path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = preprocess(0, tot_movies, {'src_path':src_path, 'tgt_path':tgt_path})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34}, set())\n"
     ]
    }
   ],
   "source": [
    "print (res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
